# Not sure if necessary... was an issue with a version conflict... try new version in the future again...
# devtools::install_version("RPostgres", version = "1.2.1", repos = "http://cran.us.r-project.org")

### general options
Sys.setenv(LANG = "en")
options("scipen" = 100, "digits" = 4) # override R's tendency to use scientific notation

### Clean workspace
rm(list=ls())
graphics.off()

### Load packages (maybe need to be installed first)
# Standard
library(tidyverse) # General DS toolkit
library(magrittr) # For advanced piping

# Databases
library(DBI) # GEneral R database interface
library(RPostgres) # PostgreSQL interface driver 
library(dbplyr) # for dplyr with databases

# networks
library(tidygraph)
library(ggraph)
library(ggrepel)

# Load functions
source("functions/functions_relatedness.R")
# Load script with variables
source("../variables.R")

# set up connection to existing PostgreSQL database, just plug in own details
con <- dbConnect(drv = RPostgres::Postgres(), 
                 dbname = var_dbname,
                 host = var_host, 
                 port = var_port,
                 user = var_user, 
                 password = var_password,
                 sslmode = 'require'
                 )

rm(var_dbname, var_host, var_password, var_port, var_user)

1 Introduction

  • In this notebook, we present the first results of an analysis of green development paths of Nordic regions
  • It is based on patent data from 1990-2015 (PATSTAT, Autumn 2021 Edition)
  • Analysis is done on all Nordic NUTS 2 regions (fractionalized patent allocation by inventor location, DOCDB family level)
  • Industries are captured by NACE2 codes of patents according to the OECD IPC-NACE2 concordance table.
  • Green patents are identified using the Y02 tag in the CPC classification

1.1 Preprocessing

# con %>% dbListTables() %>% sort()
## LOAD DATA

# Regular tables
data_appln <- read_rds('../temp/data_appln.rds')
data_docdb_fam_cpc <- read_rds('../temp/data_docdb_fam_cpc.rds')
data_pers_appln  <- read_rds('../temp/data_pers_appln.rds')
data_person <- read_rds('../temp/data_person.rds')
data_nace2 <- read_rds('../temp/data_nace2.rds')

# Adittional ones
g_tech <- read_rds('../temp/g_tech.rds')
region_RTA <- read_rds('../temp/region_RTA.rds')
region_tech <- read_rds('../temp/region_tech.rds')
## Defining parameters
n_regions = 10
cuttoff = 50
# Select region
select_regions <- region_tech %>%
  group_by(nuts) %>%
  summarise(n = sum(weight_frac, na.rm = TRUE),
            n_Y = sum(weight_frac * Y_tag, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(share_Y = n_Y / (n + n_Y)) %>%
  filter(nuts %in% c('SE232', 'NO043', 'DK012')) %>%
  arrange(desc(n_Y))
# Restrict to top N regions
top_regions <- region_tech %>%
  group_by(nuts) %>%
  summarise(n = sum(weight_frac, na.rm = TRUE),
            n_Y = sum(weight_frac * Y_tag, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(share_Y = n_Y / (n + n_Y)) %>%
  #filter(n_Y >= cuttoff ) %>%
  arrange(desc(n_Y)) %>%
  slice(1:n_regions) %>%
  bind_rows(select_regions) %>%
  distinct()
# Createdataframe with technology relatedness edgelist
tech_rel <- g_tech %E>%
  mutate(from_nace = .N()$name[from],
         to_nace = .N()$name[to]) %>%
  as_tibble() %>%
  mutate(from = from_nace %>% as.character(),
         to = to_nace %>% as.character(),
         pct_rank = weight %>% percent_rank()) %>%
  arrange(from, to) 

tech_rel %<>%
  bind_rows(tech_rel %>% 
              rename(from_new = to, to_new = from) %>% 
              rename(from = from_new, to = to_new) %>%
              relocate(from, to)) %>%
  distinct(from, to, .keep_all = TRUE)
# augment regions data 
# NOTE: DO earlier already in construction
region_RTA %<>%
  mutate(country = nuts %>% str_sub(1,2)) 
# Summarize Regions
region_RTA_agg <- region_RTA %>%
  group_by(nuts, period, Y_tag) %>%
  summarise(n_spec = rta_bin %>% sum(na.rm = TRUE),
            n_spec_count = (n_tech_region * rta_bin) %>% sum(na.rm = TRUE),
            HHI = sum((n_tech_region/sum(n_tech_region) * 100)^2) ) %>%
  ungroup() %>%
  mutate(country = nuts %>% str_sub(1,2),
    nuts_period = paste(nuts, 'P', period)) 
`summarise()` has grouped output by 'nuts', 'period'. You can override using the `.groups` argument.

2 Patent application development

  • In the following, a brief descriptive analysis of the development of green and non-green patent application in the Nordics
  • In addition, a breackdown of green patents by top green patenting reagions
# Dataframe with regions and technology fields
tech_dev <- region_RTA %>%
  select(period, country, nuts, nace_group, Y_tag, n_tech_region, rta, rta_bin) %>%
  arrange(nuts, nace_group, Y_tag, period) 

tech_dev %<>% 
  group_by(nuts, nace_group, Y_tag) %>%
  mutate(rta_dev = case_when(
    rta_bin < lag(rta_bin, 1)  ~ -1,
    rta_bin == lag(rta_bin, 1)  ~ 0,
    rta_bin > lag(rta_bin, 1)  ~ 1 
  )) %>%
  ungroup()
data_appln %>%
  count(appln_filing_year, Y_tag) %>%
  ggplot(aes(x = appln_filing_year, y = n, col = Y_tag)) + 
  geom_line(key_glyph = "timeseries") +
  labs(title = 'Patent applications: Development',
       subtitle = 'All Nordic contries, by Y tag',
       x = 'Year',
       y = 'Number applications',
       col = 'Green')

3 Technology space general

  • We calculate the relatedness of industries by co-occurence pattern following Hidalgo & Hausmann (2007)
  • Revealed technological advantage (RTA) Is sepperatedly calculated for Y-tag and non-Y-tag patents.
data_pers_appln %>%
  semi_join(top_regions, by = 'nuts') %>%
  count(appln_filing_year, nuts, Y_tag, wt = weight_frac) %>%
  ggplot(aes(x = appln_filing_year, y = n, col = nuts)) + 
  geom_line(key_glyph = "timeseries") +
  facet_wrap(vars(Y_tag), scales = 'free') +
  labs(title = 'Patent applications: Development',
       subtitle = 'All Nordic contries',
       x = 'Year',
       y = 'Number applications, by region and Y tag',
       col = 'Nuts3')

set.seed(1337)
coords_tech <- g_tech %>% igraph::layout.fruchterman.reingold() %>% as_tibble()
colnames(coords_tech) <- c("x", "y")

4 Regional specialization (RTA) development

  • Comparison of specialization provides in period 1 and 2
g_tech %N>%
  mutate(nace_group_name = nace_group_name %>% str_trunc(50, side = 'right')) %>%
  ggraph(layout =  coords_tech) + 
  geom_edge_link(aes(width = weight, alpha = weight), colour = "grey") + 
  geom_node_point(aes(colour = nace_sec_name, size = dgr)) + 
  geom_node_text(aes(label = nace_group_name, size = dgr, filter = percent_rank(dgr) >= 0.5 ), repel = TRUE) +
  theme_void() +
  theme(legend.position="bottom") + 
  labs(title = 'Industry Space (all Nordics)',
       subtitle = 'Nodes = NACE 2 Industries. Edges: Relatedness')

p1 <- region_RTA_agg  %>%
  semi_join(top_regions, by = 'nuts') %>%
  pivot_wider(names_from = Y_tag, values_from = c(n_spec, n_spec_count, HHI), values_fill = 0, names_prefix = 'Y_tag_') 

p2 <- p1 %>% 
  select(period, nuts, n_spec_Y_tag_FALSE, n_spec_Y_tag_TRUE) %>%
  pivot_wider(names_from = period, values_from = c(n_spec_Y_tag_FALSE, n_spec_Y_tag_TRUE))
p1 %>%
  ggplot(aes(x = n_spec_Y_tag_FALSE, y = n_spec_Y_tag_TRUE)) +
  geom_point(aes(size = n_spec_count_Y_tag_TRUE, col = HHI_Y_tag_TRUE)) +
  geom_text(aes(label = nuts), position = position_dodge(0.9), alpha = 0.75) +
  geom_segment(data = p2, 
               aes(x = n_spec_Y_tag_FALSE_1,
                   y = n_spec_Y_tag_TRUE_1,
                   xend = n_spec_Y_tag_FALSE_2,
                   yend = n_spec_Y_tag_TRUE_2,
                   size = 1),
               alpha = 0.15,
               arrow = arrow(length = unit(0.5, "cm"), type = "closed"),
               show.legend = FALSE) +
  scale_color_gradient2(low = "skyblue", mid = 'yellow', high = "red", midpoint = 1) +
  labs(title = 'Development of new regional specializations', 
       subtitle = 'By number of green and non green specializations in period 1 and 2',
       note = '',
       x = 'N non-green specializations',
       y = 'N green specializations',
       size = 'N green patents',
       col = 'HHI green patents') 

5 Analysis for existing green paths:

rm(p2, p2)
Warning: object 'p2' not found
select_regions_green <- tech_dev %>%
  group_by(nuts, period) %>%
  summarise(green =  sum(Y_tag * rta, na.rm = TRUE),
            green_bin =  sum(Y_tag * rta_bin, na.rm = TRUE),
            n_tech_region =  sum(n_tech_region, na.rm = TRUE),
            n_green_region =  sum(Y_tag * n_tech_region, na.rm = TRUE),
            n_green_rta =  sum(Y_tag * n_tech_region * rta_bin, na.rm = TRUE)) %>%
  ungroup() %>%
  filter(n_green_rta >= 100,
         green_bin >= 1,
         period == '1') %>%
  select(nuts)
`summarise()` has grouped output by 'nuts'. You can override using the `.groups` argument.
tech_spec_dev <- tech_dev %>%
  filter(n_tech_region >= 50) %>%
  group_by(nuts, Y_tag) %>%
  summarise(rta_dev = rta_dev %>% sum(na.rm = TRUE)) %>%
  ungroup() %>%
  pivot_wider(names_from = Y_tag, values_from = rta_dev, values_fill = 0, names_prefix = 'Y_spec_')
`summarise()` has grouped output by 'nuts'. You can override using the `.groups` argument.
tech_rel_dev <- tech_rel %>% 
  select(from, to, weight) %>%
  left_join(tech_dev %>% distinct(nace_group, nuts), by = c('from' = 'nace_group')) %>%
  # filter for rta in period 1
  inner_join(tech_dev %>% filter(period == '1', rta_bin == 1) %>% select(nace_group, nuts, Y_tag), by = c('to' = 'nace_group', 'nuts')) %>%
  # filter for new green specialization in period 2
  semi_join(tech_dev %>% filter(period == '2', rta_bin == 1, rta_dev == 1, Y_tag == TRUE), by = c('from' = 'nace_group', 'nuts')) %>%
  rename(nace_group = from, related_techn = to) %>%
  group_by(nuts, nace_group, Y_tag) %>%
  summarise(rel_max = weight %>% max(),
            rel_sum = weight %>% sum(),
            rel_mean = weight %>% mean()) %>%
  ungroup()
`summarise()` has grouped output by 'nuts', 'nace_group'. You can override using the `.groups` argument.
tech_rel_dev %>%
  group_by(nuts, Y_tag) %>%
  summarise(rel = rel_max %>% mean()) %>%
  ungroup() %>%
  pivot_wider(names_from = Y_tag, values_from = rel, names_prefix = 'Y_', values_fill = 0) %>%
  left_join(tech_dev %>% filter(Y_tag == TRUE, period == '2', rta_bin == 1) %>% select(nuts , n_tech_region) %>% count(nuts, wt = n_tech_region), by = c('nuts')) %>%
  mutate(country = nuts %>% str_sub(1,2)) %>%
  #semi_join(top_regions, by = 'nuts') %>%
  filter(0.5 <= percent_rank(n)) %>%
  ggplot(aes(x = Y_FALSE, y = Y_TRUE, size = n)) +
  geom_point(aes(col = country)) +
  geom_text_repel(aes(label = nuts), box.padding = 0.5, max.overlaps = Inf) +
  theme(legend.position="bottom") + 
    labs(title = 'New green specialization period 2', 
       subtitle = 'By nuts regions',
       note = 'Relatedness is the mean over all new green specializations, per green specialization largest relatedness to former specialization counted',
       x = 'Relatedness non-green',
       y = 'Relatedness green',
       size = 'N green patents') 
`summarise()` has grouped output by 'nuts'. You can override using the `.groups` argument.

6 TODO

tech_rel_dev %>%
  group_by(nuts, nace_group, Y_tag) %>%
  summarise(rel = rel_max %>% mean()) %>%
  ungroup() %>%
  pivot_wider(names_from = Y_tag, values_from = rel, names_prefix = 'Y_', values_fill = 0) %>%
  left_join(tech_dev %>% filter(Y_tag == TRUE, period == '2', rta_bin == 1) %>% select(nuts, nace_group, n_tech_region) %>% count(nuts, nace_group, wt = n_tech_region), by = c('nuts', 'nace_group')) %>%
  mutate(country = nuts %>% str_sub(1,2)) %>%
  semi_join(top_regions, by = 'nuts') %>%
  ggplot(aes(x = Y_FALSE, y = Y_TRUE, size = n)) +
  geom_point() +
  geom_text_repel(aes(label = nace_group), box.padding = 0.5) +
  facet_wrap(vars(nuts)) +
    labs(title = 'New green specialization period 2', 
       subtitle = 'By nuts regions',
       note = 'Relatedness is the mean over all new green specializations, per green specialization largest relatedness to former specialization counted',
       x = 'Relatedness non-green',
       y = 'Relatedness green',
       size = 'N green patents') 
`summarise()` has grouped output by 'nuts', 'nace_group'. You can override using the `.groups` argument.

---
title: 'Green Regional Path paper: First results'
author: "Daniel S. Hain"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output:
  html_notebook:
    df_print: paged
    toc: yes
    toc_depth: 3
    toc_float: yes
    number_sections: yes
    code_folding: hide
---


```{r, setup, include=FALSE}
# Not sure if necessary... was an issue with a version conflict... try new version in the future again...
# devtools::install_version("RPostgres", version = "1.2.1", repos = "http://cran.us.r-project.org")

### general options
Sys.setenv(LANG = "en")
options("scipen" = 100, "digits" = 4) # override R's tendency to use scientific notation

### Clean workspace
rm(list=ls())
graphics.off()

### Load packages (maybe need to be installed first)
# Standard
library(tidyverse) # General DS toolkit
library(magrittr) # For advanced piping

# Databases
library(DBI) # GEneral R database interface
library(RPostgres) # PostgreSQL interface driver 
library(dbplyr) # for dplyr with databases

# networks
library(tidygraph)
library(ggraph)
library(ggrepel)

# Load functions
source("functions/functions_relatedness.R")
```

```{r}
# Load script with variables
source("../variables.R")

# set up connection to existing PostgreSQL database, just plug in own details
con <- dbConnect(drv = RPostgres::Postgres(), 
                 dbname = var_dbname,
                 host = var_host, 
                 port = var_port,
                 user = var_user, 
                 password = var_password,
                 sslmode = 'require'
                 )

rm(var_dbname, var_host, var_password, var_port, var_user)
```

```{r}
# con %>% dbListTables() %>% sort()
```

# Introduction

* In this notebook, we present the first results of an analysis of green development paths of Nordic regions
* It is based on patent data from 1990-2015 (PATSTAT, Autumn 2021 Edition)
* Analysis is done on all Nordic NUTS 2 regions (fractionalized patent allocation by inventor location, DOCDB family level)
* Industries are captured by NACE2 codes of patents according to the OECD IPC-NACE2 concordance table.
* Green patents are identified using the Y02 tag in the CPC classification

## Preprocessing 

```{r}
## LOAD DATA

# Regular tables
data_appln <- read_rds('../temp/data_appln.rds')
data_docdb_fam_cpc <- read_rds('../temp/data_docdb_fam_cpc.rds')
data_pers_appln  <- read_rds('../temp/data_pers_appln.rds')
data_person <- read_rds('../temp/data_person.rds')
data_nace2 <- read_rds('../temp/data_nace2.rds')

# Adittional ones
g_tech <- read_rds('../temp/g_tech.rds')
region_RTA <- read_rds('../temp/region_RTA.rds')
region_tech <- read_rds('../temp/region_tech.rds')
```


```{r}
## Defining parameters
n_regions = 10
cuttoff = 50
```

```{r}
# Select region
select_regions <- region_tech %>%
  group_by(nuts) %>%
  summarise(n = sum(weight_frac, na.rm = TRUE),
            n_Y = sum(weight_frac * Y_tag, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(share_Y = n_Y / (n + n_Y)) %>%
  filter(nuts %in% c('SE232', 'NO043', 'DK012')) %>%
  arrange(desc(n_Y))
```

```{r}
# Restrict to top N regions
top_regions <- region_tech %>%
  group_by(nuts) %>%
  summarise(n = sum(weight_frac, na.rm = TRUE),
            n_Y = sum(weight_frac * Y_tag, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(share_Y = n_Y / (n + n_Y)) %>%
  #filter(n_Y >= cuttoff ) %>%
  arrange(desc(n_Y)) %>%
  slice(1:n_regions) %>%
  bind_rows(select_regions) %>%
  distinct()
```

```{r}
# Createdataframe with technology relatedness edgelist
tech_rel <- g_tech %E>%
  mutate(from_nace = .N()$name[from],
         to_nace = .N()$name[to]) %>%
  as_tibble() %>%
  mutate(from = from_nace %>% as.character(),
         to = to_nace %>% as.character(),
         pct_rank = weight %>% percent_rank()) %>%
  arrange(from, to) 

tech_rel %<>%
  bind_rows(tech_rel %>% 
              rename(from_new = to, to_new = from) %>% 
              rename(from = from_new, to = to_new) %>%
              relocate(from, to)) %>%
  distinct(from, to, .keep_all = TRUE)
```

```{r}
# augment regions data 
# NOTE: DO earlier already in construction
region_RTA %<>%
  mutate(country = nuts %>% str_sub(1,2)) 
```

```{r}
# Summarize Regions
region_RTA_agg <- region_RTA %>%
  group_by(nuts, period, Y_tag) %>%
  summarise(n_spec = rta_bin %>% sum(na.rm = TRUE),
            n_spec_count = (n_tech_region * rta_bin) %>% sum(na.rm = TRUE),
            HHI = sum((n_tech_region/sum(n_tech_region) * 100)^2) ) %>%
  ungroup() %>%
  mutate(country = nuts %>% str_sub(1,2),
    nuts_period = paste(nuts, 'P', period)) 
```

```{r}
# Dataframe with regions and technology fields
tech_dev <- region_RTA %>%
  select(period, country, nuts, nace_group, Y_tag, n_tech_region, rta, rta_bin) %>%
  arrange(nuts, nace_group, Y_tag, period) 

tech_dev %<>% 
  group_by(nuts, nace_group, Y_tag) %>%
  mutate(rta_dev = case_when(
    rta_bin < lag(rta_bin, 1)  ~ -1,
    rta_bin == lag(rta_bin, 1)  ~ 0,
    rta_bin > lag(rta_bin, 1)  ~ 1 
  )) %>%
  ungroup()
```

# Patent application development

* In the following, a brief descriptive analysis of the development of green and non-green patent application in the Nordics
* In addition, a breackdown of green patents by top green patenting reagions

```{r}
data_appln %>%
  count(appln_filing_year, Y_tag) %>%
  ggplot(aes(x = appln_filing_year, y = n, col = Y_tag)) + 
  geom_line(key_glyph = "timeseries") +
  labs(title = 'Patent applications: Development',
       subtitle = 'All Nordic contries, by Y tag',
       x = 'Year',
       y = 'Number applications',
       col = 'Green')
```
```{r}
data_pers_appln %>%
  semi_join(top_regions, by = 'nuts') %>%
  count(appln_filing_year, nuts, Y_tag, wt = weight_frac) %>%
  ggplot(aes(x = appln_filing_year, y = n, col = nuts)) + 
  geom_line(key_glyph = "timeseries") +
  facet_wrap(vars(Y_tag), scales = 'free') +
  labs(title = 'Patent applications: Development',
       subtitle = 'All Nordic contries',
       x = 'Year',
       y = 'Number applications, by region and Y tag',
       col = 'Nuts3')
```

# Technology space general

* We calculate the relatedness of industries by co-occurence pattern following Hidalgo & Hausmann (2007)
* Revealed technological advantage (RTA) Is sepperatedly calculated for Y-tag and non-Y-tag patents.

```{r}
set.seed(1337)
coords_tech <- g_tech %>% igraph::layout.fruchterman.reingold() %>% as_tibble()
colnames(coords_tech) <- c("x", "y")
```

```{r, fig.width=15, fig.height= 12.5}
g_tech %N>%
  mutate(nace_group_name = nace_group_name %>% str_trunc(50, side = 'right')) %>%
  ggraph(layout =  coords_tech) + 
  geom_edge_link(aes(width = weight, alpha = weight), colour = "grey") + 
  geom_node_point(aes(colour = nace_sec_name, size = dgr)) + 
  geom_node_text(aes(label = nace_group_name, size = dgr, filter = percent_rank(dgr) >= 0.5 ), repel = TRUE) +
  theme_void() +
  theme(legend.position="bottom") + 
  labs(title = 'Industry Space (all Nordics)',
       subtitle = 'Nodes = NACE 2 Industries. Edges: Relatedness')
```

# Regional specialization (RTA) development

* Comparison of specialization provides in period 1 and 2

```{r}
p1 <- region_RTA_agg  %>%
  semi_join(top_regions, by = 'nuts') %>%
  pivot_wider(names_from = Y_tag, values_from = c(n_spec, n_spec_count, HHI), values_fill = 0, names_prefix = 'Y_tag_') 

p2 <- p1 %>% 
  select(period, nuts, n_spec_Y_tag_FALSE, n_spec_Y_tag_TRUE) %>%
  pivot_wider(names_from = period, values_from = c(n_spec_Y_tag_FALSE, n_spec_Y_tag_TRUE))
```

```{r, fig.width= 10, fig.height=10}
p1 %>%
  ggplot(aes(x = n_spec_Y_tag_FALSE, y = n_spec_Y_tag_TRUE)) +
  geom_point(aes(size = n_spec_count_Y_tag_TRUE, col = HHI_Y_tag_TRUE)) +
  geom_text(aes(label = nuts), position = position_dodge(0.9), alpha = 0.75) +
  geom_segment(data = p2, 
               aes(x = n_spec_Y_tag_FALSE_1,
                   y = n_spec_Y_tag_TRUE_1,
                   xend = n_spec_Y_tag_FALSE_2,
                   yend = n_spec_Y_tag_TRUE_2,
                   size = 1),
               alpha = 0.15,
               arrow = arrow(length = unit(0.5, "cm"), type = "closed"),
               show.legend = FALSE) +
  scale_color_gradient2(low = "skyblue", mid = 'yellow', high = "red", midpoint = 1) +
  labs(title = 'Development of new regional specializations', 
       subtitle = 'By number of green and non green specializations in period 1 and 2',
       note = '',
       x = 'N non-green specializations',
       y = 'N green specializations',
       size = 'N green patents',
       col = 'HHI green patents') 
```

```{r}
rm(p2, p2)
```

# Analysis for existing green paths:

```{r}
select_regions_green <- tech_dev %>%
  group_by(nuts, period) %>%
  summarise(green =  sum(Y_tag * rta, na.rm = TRUE),
            green_bin =  sum(Y_tag * rta_bin, na.rm = TRUE),
            n_tech_region =  sum(n_tech_region, na.rm = TRUE),
            n_green_region =  sum(Y_tag * n_tech_region, na.rm = TRUE),
            n_green_rta =  sum(Y_tag * n_tech_region * rta_bin, na.rm = TRUE)) %>%
  ungroup() %>%
  filter(n_green_rta >= 100,
         green_bin >= 1,
         period == '1') %>%
  select(nuts)
```

```{r}
tech_spec_dev <- tech_dev %>%
  filter(n_tech_region >= 50) %>%
  group_by(nuts, Y_tag) %>%
  summarise(rta_dev = rta_dev %>% sum(na.rm = TRUE)) %>%
  ungroup() %>%
  pivot_wider(names_from = Y_tag, values_from = rta_dev, values_fill = 0, names_prefix = 'Y_spec_')
```

```{r}
tech_rel_dev <- tech_rel %>% 
  select(from, to, weight) %>%
  left_join(tech_dev %>% distinct(nace_group, nuts), by = c('from' = 'nace_group')) %>%
  # filter for rta in period 1
  inner_join(tech_dev %>% filter(period == '1', rta_bin == 1) %>% select(nace_group, nuts, Y_tag), by = c('to' = 'nace_group', 'nuts')) %>%
  # filter for new green specialization in period 2
  semi_join(tech_dev %>% filter(period == '2', rta_bin == 1, rta_dev == 1, Y_tag == TRUE), by = c('from' = 'nace_group', 'nuts')) %>%
  rename(nace_group = from, related_techn = to) %>%
  group_by(nuts, nace_group, Y_tag) %>%
  summarise(rel_max = weight %>% max(),
            rel_sum = weight %>% sum(),
            rel_mean = weight %>% mean()) %>%
  ungroup()
```

```{r, fig.width= 10, fig.height=10}
tech_rel_dev %>%
  group_by(nuts, Y_tag) %>%
  summarise(rel = rel_max %>% mean()) %>%
  ungroup() %>%
  pivot_wider(names_from = Y_tag, values_from = rel, names_prefix = 'Y_', values_fill = 0) %>%
  left_join(tech_dev %>% filter(Y_tag == TRUE, period == '2', rta_bin == 1) %>% select(nuts , n_tech_region) %>% count(nuts, wt = n_tech_region), by = c('nuts')) %>%
  mutate(country = nuts %>% str_sub(1,2)) %>%
  #semi_join(top_regions, by = 'nuts') %>%
  filter(0.5 <= percent_rank(n)) %>%
  ggplot(aes(x = Y_FALSE, y = Y_TRUE, size = n)) +
  geom_point(aes(col = country)) +
  geom_text_repel(aes(label = nuts), box.padding = 0.5, max.overlaps = Inf) +
  theme(legend.position="bottom") + 
    labs(title = 'New green specialization period 2', 
       subtitle = 'By nuts regions',
       note = 'Relatedness is the mean over all new green specializations, per green specialization largest relatedness to former specialization counted',
       x = 'Relatedness non-green',
       y = 'Relatedness green',
       size = 'N green patents') 
```

```{r, fig.width= 10, fig.height=10}
tech_rel_dev %>%
  group_by(nuts, nace_group, Y_tag) %>%
  summarise(rel = rel_max %>% mean()) %>%
  ungroup() %>%
  pivot_wider(names_from = Y_tag, values_from = rel, names_prefix = 'Y_', values_fill = 0) %>%
  left_join(tech_dev %>% filter(Y_tag == TRUE, period == '2', rta_bin == 1) %>% select(nuts, nace_group, n_tech_region) %>% count(nuts, nace_group, wt = n_tech_region), by = c('nuts', 'nace_group')) %>%
  mutate(country = nuts %>% str_sub(1,2)) %>%
  semi_join(top_regions, by = 'nuts') %>%
  ggplot(aes(x = Y_FALSE, y = Y_TRUE, size = n)) +
  geom_point() +
  geom_text_repel(aes(label = nace_group), box.padding = 0.5) +
  facet_wrap(vars(nuts)) +
    labs(title = 'New green specialization period 2', 
       subtitle = 'By nuts regions',
       note = 'Relatedness is the mean over all new green specializations, per green specialization largest relatedness to former specialization counted',
       x = 'Relatedness non-green',
       y = 'Relatedness green',
       size = 'N green patents') 
```

# TODO

* Label 2nd figure on green, label coinciding, maybe rta, share or growth
* Do some radar plotting, eg: https://r-graph-gallery.com/web-circular-barplot-with-R-and-ggplot2.html 

```{r}
sessionInfo()
```


